pacman::p_load(tmap,sf,tidyverse,knitr,h3jsr,dplyr, mapview)Take Home Exercise 1
Getting Started
Preparing the Flow Data
Importing the OD data
Firstly, we will import the Passenger Volume by Origin Destination Bus Stops data set downloaded from LTA DataMall by using read_csv() of readr package.
odbus <- read_csv("data/aspatial/origin_destination_bus_202308.csv")Check odbus tibble data frame that values in OROGIN_PT_CODE and DESTINATION_PT_CODE are in numeric data type.
glimpse(odbus)Rows: 5,709,512
Columns: 7
$ YEAR_MONTH <chr> "2023-08", "2023-08", "2023-08", "2023-08", "2023-…
$ DAY_TYPE <chr> "WEEKDAY", "WEEKENDS/HOLIDAY", "WEEKENDS/HOLIDAY",…
$ TIME_PER_HOUR <dbl> 16, 16, 14, 14, 17, 17, 17, 17, 7, 17, 14, 10, 10,…
$ PT_TYPE <chr> "BUS", "BUS", "BUS", "BUS", "BUS", "BUS", "BUS", "…
$ ORIGIN_PT_CODE <chr> "04168", "04168", "80119", "80119", "44069", "4406…
$ DESTINATION_PT_CODE <chr> "10051", "10051", "90079", "90079", "17229", "1722…
$ TOTAL_TRIPS <dbl> 7, 2, 3, 10, 5, 4, 3, 22, 3, 3, 7, 1, 3, 1, 3, 1, …
Origin & Destination Bus Stop Code
odbus$ORIGIN_PT_CODE <-
as.factor(odbus$ORIGIN_PT_CODE)
odbus$DESTINATION_PT_CODE <-
as.factor(odbus$DESTINATION_PT_CODE)Extracting the Study Data
Filter out data that belong to trips that occur on:
“Weekday” and “6-9am” (wdmp)
wdmp <- odbus %>% filter(DAY_TYPE == "WEEKDAY") %>% filter(TIME_PER_HOUR >= 6 & TIME_PER_HOUR <= 9) %>% group_by(ORIGIN_PT_CODE) %>% summarise(TRIPS = sum(TOTAL_TRIPS))“Weekday” and “5-8pm” (wdap)
wdap <- odbus %>% filter(DAY_TYPE == "WEEKDAY") %>% filter(TIME_PER_HOUR >= 17 & TIME_PER_HOUR <= 20) %>% group_by(ORIGIN_PT_CODE) %>% summarise(TRIPS = sum(TOTAL_TRIPS))“Weekends/Holiday” and “11am-2pm” (hmp)
hmp <- odbus %>% filter(DAY_TYPE == "WEEKENDS/HOLIDAY") %>% filter(TIME_PER_HOUR >= 11 & TIME_PER_HOUR <= 14) %>% group_by(ORIGIN_PT_CODE) %>% summarise(TRIPS = sum(TOTAL_TRIPS))“Weekends/Holiday” and “4pm-7pm” (hep)
hep <- odbus %>% filter(DAY_TYPE == "WEEKENDS/HOLIDAY") %>% filter(TIME_PER_HOUR >= 16 & TIME_PER_HOUR <= 19) %>% group_by(ORIGIN_PT_CODE) %>% summarise(TRIPS = sum(TOTAL_TRIPS))
Check resulting data tables
kable(head(wdmp)) | ORIGIN_PT_CODE | TRIPS |
|---|---|
| 01012 | 1973 |
| 01013 | 952 |
| 01019 | 1789 |
| 01029 | 2561 |
| 01039 | 2938 |
| 01059 | 1651 |
kable(head(wdap)) | ORIGIN_PT_CODE | TRIPS |
|---|---|
| 01012 | 8448 |
| 01013 | 7328 |
| 01019 | 3608 |
| 01029 | 9317 |
| 01039 | 12937 |
| 01059 | 2133 |
kable(head(hmp)) | ORIGIN_PT_CODE | TRIPS |
|---|---|
| 01012 | 2273 |
| 01013 | 1697 |
| 01019 | 1511 |
| 01029 | 3272 |
| 01039 | 5424 |
| 01059 | 1062 |
kable(head(hep))| ORIGIN_PT_CODE | TRIPS |
|---|---|
| 01012 | 3208 |
| 01013 | 2796 |
| 01019 | 1623 |
| 01029 | 4244 |
| 01039 | 7403 |
| 01059 | 1190 |
Output saved in rds format for future use
write_rds(wdmp, "data/rds/wdmp.rds")
write_rds(wdap, "data/rds/wdap.rds")
write_rds(hmp, "data/rds/hmp.rds")
write_rds(hep, "data/rds/hep.rds")Import the rds file into R environment
wdmp <- read_rds("data/rds/wdmp.rds")
wdap <- read_rds("data/rds/wdap.rds")
hmp <- read_rds("data/rds/hmp.rds")
hep <- read_rds("data/rds/hmp.rds")Working with Geospatial Data
Two geospatial data (shapefile) will be used for this exercise:
BusStop: Provides location of bus stop as at Q4 2022
MPSZ-2019: This data provides the sub-zone boundary of URA Master Plan 2019
Importing geospatial data
busstop <- st_read(dsn = "Data/geospatial",
layer = "BusStop") %>%
st_transform(crs = 3414)Reading layer `BusStop' from data source
`/Users/youting/ytquek/ISSS624/Project/data/geospatial' using driver `ESRI Shapefile'
Simple feature collection with 5161 features and 3 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 3970.122 ymin: 26482.1 xmax: 48284.56 ymax: 52983.82
Projected CRS: SVY21
mpsz <- st_read(dsn = "data/geospatial",
layer = "MPSZ-2019") %>%
st_transform(crs = 3414)Reading layer `MPSZ-2019' from data source
`/Users/youting/ytquek/ISSS624/Project/data/geospatial' using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS: WGS 84
Check structure of busstop and MPSZ sf tibble data frame
glimpse(busstop)Rows: 5,161
Columns: 4
$ BUS_STOP_N <chr> "22069", "32071", "44331", "96081", "11561", "66191", "2338…
$ BUS_ROOF_N <chr> "B06", "B23", "B01", "B05", "B05", "B03", "B02A", "B02", "B…
$ LOC_DESC <chr> "OPP CEVA LOGISTICS", "AFT TRACK 13", "BLK 239", "GRACE IND…
$ geometry <POINT [m]> POINT (13576.31 32883.65), POINT (13228.59 44206.38),…
glimpse(mpsz)Rows: 332
Columns: 7
$ SUBZONE_N <chr> "MARINA EAST", "INSTITUTION HILL", "ROBERTSON QUAY", "JURON…
$ SUBZONE_C <chr> "MESZ01", "RVSZ05", "SRSZ01", "WISZ01", "MUSZ02", "MPSZ05",…
$ PLN_AREA_N <chr> "MARINA EAST", "RIVER VALLEY", "SINGAPORE RIVER", "WESTERN …
$ PLN_AREA_C <chr> "ME", "RV", "SR", "WI", "MU", "MP", "WI", "WI", "SI", "SI",…
$ REGION_N <chr> "CENTRAL REGION", "CENTRAL REGION", "CENTRAL REGION", "WEST…
$ REGION_C <chr> "CR", "CR", "CR", "WR", "CR", "CR", "WR", "WR", "CR", "CR",…
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((33222.98 29..., MULTIPOLYGON (…
Geospatial Data Wrangling
Combining Busstop & mpsz
busstop_mpsz <- st_intersection(busstop, mpsz) %>%
select(BUS_STOP_N, SUBZONE_C) %>%
st_drop_geometry()Save output into rds format
write_rds(busstop_mpsz, "data/rds/busstop_mpsz.csv")Setting up the Hexagon Grid
Drawing the Hexagon Grid
Draw hexagon grid over the mpsz map
area_honeycomb_grid = st_make_grid(mpsz, c(250, 250), what = "polygons", square = FALSE)Convert the hexagon grid to sf (simple features) object and add a new column grid_id (sequential identifier) to it
honeycomb_grid_sf = st_sf(area_honeycomb_grid) %>%
# add grid ID
mutate(grid_id = 1:length(lengths(area_honeycomb_grid)))Determine which bus stops is contained within which hexagon using st_within.
busstop_honeycomb <- st_intersection(honeycomb_grid_sf,busstop) %>%
select(BUS_STOP_N, grid_id) %>%
st_drop_geometry()Save output into rds format
write_rds(busstop_honeycomb, "data/rds/busstop_honeycomb.csv")Check for duplicate records
duplicate <- busstop_honeycomb %>%
group_by_all() %>%
filter(n()>1) %>%
ungroup()Retain unique records only
busstop_honeycomb <- unique(busstop_honeycomb)Only include hexagon grid IDs that have bus stop numbers
busstop_honeycomb <- busstop_honeycomb %>%
filter(!is.na(grid_id) & grid_id > 0)Assign Each Bus Stop to a Grid ID
For all time periods, assign bus stops to a hexagon grid id. All NULL and 0 values are removed.
wdmp_gridid <- left_join(busstop_honeycomb, wdmp,
by = c("BUS_STOP_N" = "ORIGIN_PT_CODE"))
wdmp_gridid <- wdmp_gridid %>%
filter(!is.na(TRIPS) & TRIPS > 0)
wdap_gridid <- left_join(busstop_honeycomb, wdap,
by = c("BUS_STOP_N" = "ORIGIN_PT_CODE"))
wdap_gridid <- wdap_gridid %>%
filter(!is.na(TRIPS) & TRIPS > 0)
hmp_gridid <- left_join(busstop_honeycomb, hmp,
by = c("BUS_STOP_N" = "ORIGIN_PT_CODE"))
hmp_gridid <- hmp_gridid %>%
filter(!is.na(TRIPS) & TRIPS > 0)
hep_gridid <- left_join(busstop_honeycomb, hep,
by = c("BUS_STOP_N" = "ORIGIN_PT_CODE"))
hep_gridid <- hep_gridid %>%
filter(!is.na(TRIPS) & TRIPS > 0)Choropleth Visualisation
Weekday Morning Peak 6am-9am
Sum up the trips per hexagon
total_trips_by_grid <- wdmp_gridid %>%
group_by(grid_id) %>%
summarise(total_trips = sum(TRIPS, na.rm = TRUE))Merge geospatial data
total_trips_by_grid <- total_trips_by_grid %>%
left_join(honeycomb_grid_sf, by = c("grid_id" = "grid_id"))
total_trips_by_grid_sf <- st_sf(total_trips_by_grid)Plot the Choropleth map
tmap_mode("view")
map_honeycomb = tm_shape(total_trips_by_grid_sf) +
tm_fill(
col = "total_trips",
palette = "Reds",
style = "cont",
title = "Total Trips Taken - Weekday Morning Peak 6-9am",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
popup.vars = c(
"Number of trips: " = "total_trips"
),
popup.format = list(
total_trips = list(format = "f", digits = 0)
)
) +
tm_borders(col = "grey40", lwd = 0.4)
map_honeycomb<insert description of spatial patterns in 200 words>
Weekday Afternoon Peak 5pm-8pm
Sum up the trips per hexagon
total_trips_by_grid <- wdap_gridid %>%
group_by(grid_id) %>%
summarise(total_trips = sum(TRIPS, na.rm = TRUE))Merge geospatial data
total_trips_by_grid <- total_trips_by_grid %>%
left_join(honeycomb_grid_sf, by = c("grid_id" = "grid_id"))
total_trips_by_grid_sf <- st_sf(total_trips_by_grid)Plot the Choropleth map
tmap_mode("view")
map_honeycomb = tm_shape(total_trips_by_grid_sf) +
tm_fill(
col = "total_trips",
palette = "Reds",
style = "cont",
title = "Total Trips Taken - Weekday Morning Peak 6-9am",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
popup.vars = c(
"Number of trips: " = "total_trips"
),
popup.format = list(
total_trips = list(format = "f", digits = 0)
)
) +
tm_borders(col = "grey40", lwd = 0.4)
map_honeycomb<insert description of spatial patterns in 200 words>
Weekend/Holiday Morning Peak 11am-2pm
Sum up the trips per hexagon
total_trips_by_grid <- hmp_gridid %>%
group_by(grid_id) %>%
summarise(total_trips = sum(TRIPS, na.rm = TRUE))Merge geospatial data
total_trips_by_grid <- total_trips_by_grid %>%
left_join(honeycomb_grid_sf, by = c("grid_id" = "grid_id"))
total_trips_by_grid_sf <- st_sf(total_trips_by_grid)Plot the Choropleth map
tmap_mode("view")
map_honeycomb = tm_shape(total_trips_by_grid_sf) +
tm_fill(
col = "total_trips",
palette = "Greens",
style = "cont",
title = "Total Trips Taken - Weekday Morning Peak 6-9am",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
popup.vars = c(
"Number of trips: " = "total_trips"
),
popup.format = list(
total_trips = list(format = "f", digits = 0)
)
) +
tm_borders(col = "grey40", lwd = 0.4)
map_honeycomb<insert description of spatial patterns in 200 words>
Collate hexagon data for Weekend/Holiday Evening Peak 4pm-7pm
Sum up the trips per hexagon
total_trips_by_grid <- hep_gridid %>%
group_by(grid_id) %>%
summarise(total_trips = sum(TRIPS, na.rm = TRUE))Merge geospatial data
total_trips_by_grid <- total_trips_by_grid %>%
left_join(honeycomb_grid_sf, by = c("grid_id" = "grid_id"))
total_trips_by_grid_sf <- st_sf(total_trips_by_grid)Plot the Choropleth map
tmap_mode("view")
map_honeycomb = tm_shape(total_trips_by_grid_sf) +
tm_fill(
col = "total_trips",
palette = "Greens",
style = "cont",
title = "Total Trips Taken - Weekday Morning Peak 6-9am",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
popup.vars = c(
"Number of trips: " = "total_trips"
),
popup.format = list(
total_trips = list(format = "f", digits = 0)
)
) +
tm_borders(col = "grey40", lwd = 0.4)
map_honeycomb<insert description of spatial patterns in 200 words>
Local Indicators of Spatial Association (LISA) Analysis
Compute LISA of the passengers trips generate by origin at hexagon level.
Display the LISA maps of the passengers trips generate by origin at hexagon level. The maps should only display the significant (i.e. p-value < 0.05)
With reference to the analysis results, draw statistical conclusions (not more than 200 words per visual).